---
title: "Analysing data from study 2 experiment"
output: html_document
---

# 1. Basic system setup

```{r setup, include=FALSE}
knitr::opts_chunk$set(cache=FALSE)

# load libraries
library('tidyverse')
library('survey')
library('stargazer')
library('MASS')
library('knitr')
library('kableExtra')
library('psych')
library('nFactors')
library('moments')
library('ggeffects')
library('mediation')
library('ordinal')

# set ggplot theme
theme_set(theme_gray() +
            theme(panel.grid.major = element_blank(),
            panel.grid.minor = element_blank(),
            panel.background = element_rect(colour = "black",
                                            size = 1, fill = NA)))
```


# 2. Loading data: study 2 experiment
```{r}
load(file = "replication_dataset_study2_experiment.RData")
```

# 3. Check internal consistency for combined variables

## for grp_malleaby

Calculate the Cronbach's alpha with complete cases
```{r}
alpha_grp_malleaby <- study2_exp %>%
                      dplyr::select(V16,V22,V26,V28) %>%
                      filter(complete.cases(.))
psych::alpha(alpha_grp_malleaby)$total$std.alpha
```

To calculate omega (an alternative measure of internal consistency)
```{r}
MBESS::ci.reliability(data=alpha_grp_malleaby, type="omega", conf.level = 0.95,
interval.type="bca", B=1000)
```


To check the number of factors for the components of grp_malleaby

### factor analysis

```{r}

n.factors <- 1

fit <- factanal(alpha_grp_malleaby, 
                n.factors,                # number of factors to extract
                scores=c("regression"),
                n.obs=nrow(alp_grp_malleaby))

print(fit, digits=2, cutoff=.3, sort=TRUE)

```


```{r}
head(fit$scores)
```


```{r}
corMat <- cor(alpha_grp_malleaby)

(faPC  <- fa(r=corMat, nfactors=1, rotate="varimax", n.obs = nrow(alpha_grp_malleaby)))
```


```{r}

# Determine Number of Factors to Extract
ev <- eigen(cor(alpha_grp_malleaby)) # get eigenvalues
ap <- parallel(subject=nrow(alpha_grp_malleaby),
               var=ncol(alpha_grp_malleaby),
               rep=100,
               cent=.05)
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea)
plotnScree(nS)

```

## for ntl_homogty
Calculate the Cronbach's alpha with complete cases
```{r}
alpha_ntl_homogty <- study2_exp %>%
                      dplyr::select(V15,V34,V47,V50) %>%
                      filter(complete.cases(.))
psych::alpha(alpha_ntl_homogty)$total$std.alpha
```


To calculate omega (an alternative measure of internal consistency)
```{r}
MBESS::ci.reliability(data=alpha_ntl_homogty, type="omega", conf.level = 0.95,
interval.type="bca", B=1000)

```

## for stereotypes
Calculate the Cronbach's alpha with complete cases
```{r}
alpha_stereotypes <- study2_exp %>%
                      dplyr::select(V19:V20) %>%
                      filter(complete.cases(.))
psych::alpha(alpha_stereotypes)$total$std.alpha
```


To calculate omega (an alternative measure of internal consistency)
```{r}
MBESS::ci.reliability(data=alpha_stereotypes, type="omega", conf.level = 0.95,
interval.type="bca", B=1000)
```

## for contact_immigrt
Calculate the Cronbach's alpha with complete cases
```{r}
alpha_contact_immigrt <- study2_exp %>%
                          dplyr::select(V54:V55) %>%
                          filter(complete.cases(.))
psych::alpha(alpha_contact_immigrt)$total$std.alpha
```


To calculate omega (an alternative measure of internal consistency)
```{r}
MBESS::ci.reliability(data=alpha_contact_immigrt, type="omega", conf.level = 0.95,
interval.type="bca", B=1000)
```


# 4. Frequency distribution of attitudes towards migrant

## Appendix E: FREQUENCY DISTRIBUTION OF MIGRATION ATTITUDES IN STUDIES 2

Distribution of attitudes towards migrants before recoding
```{r}

ggplot(data = study2_exp, aes(x = V7)) +
  geom_histogram(binwidth = 0.5) +
  labs(x = "Attitudes towards migrants")

```

```{r}
skewness(study2_exp$V7)
```


# 5. Summary statistics ---------------------------------------------------

```{r}
# report
summary_stat_study2_exp <- as.data.frame(study2_exp)
summary_stat_study2_exp <- summary_stat_study2_exp %>% dplyr::select(version, ntl_homogty,  grp_malleaby,
                                               V7, stereotypes, V42, V48,
                                               V8, V9, V11,
                                               contact_immigrt, V63, V59, V64)

# rename variables                                               
colnames(summary_stat_study2_exp) <- c("version",
                            "ntl_homogty",
                            "grp_malleaby",
                            "attitu_immigrt",
                            "stereotypes",
                            "social_distance",
                            "cultural_threat",
                            "economic_threat",
                            "satisfied_hk_econ",
                            "proud_HKer",
                            "contact_immigrt",
                            "household_income",
                            "sex",
                            "place_of_birth")

# this returns the summary stat for one variable by group version.
# think about how to apply it to all variables

mean_v1 <- lapply(summary_stat_study2_exp[summary_stat_study2_exp$version == 1,],
                  mean, na.rm=TRUE) %>% as.data.frame() %>% t()
mean_v2 <- lapply(summary_stat_study2_exp[summary_stat_study2_exp$version == 2,],
                  mean, na.rm=TRUE) %>% as.data.frame() %>% t()
sd_v1 <- lapply(summary_stat_study2_exp[summary_stat_study2_exp$version == 1,],
                  sd, na.rm=TRUE) %>% as.data.frame() %>% t()
sd_v2 <- lapply(summary_stat_study2_exp[summary_stat_study2_exp$version == 2,],
                  sd, na.rm=TRUE) %>% as.data.frame() %>% t()
min_v1 <- lapply(summary_stat_study2_exp[summary_stat_study2_exp$version == 1,],
                min, na.rm=TRUE) %>% as.data.frame() %>% t()
min_v2 <- lapply(summary_stat_study2_exp[summary_stat_study2_exp$version == 2,],
                min, na.rm=TRUE) %>% as.data.frame() %>% t()
max_v1 <- lapply(summary_stat_study2_exp[summary_stat_study2_exp$version == 1,],
                max, na.rm=TRUE) %>% as.data.frame() %>% t()
max_v2 <- lapply(summary_stat_study2_exp[summary_stat_study2_exp$version == 2,],
                max, na.rm=TRUE) %>% as.data.frame() %>% t()
obs_v1 <- lapply(summary_stat_study2_exp[summary_stat_study2_exp$version == 1,],
                 function(x) { length(which(!is.na(x)))} ) %>%
                      as.data.frame() %>% t()
obs_v2 <- lapply(summary_stat_study2_exp[summary_stat_study2_exp$version == 2,],
                 function(x) { length(which(!is.na(x)))} ) %>%
                      as.data.frame() %>% t()

tt_tvalue <- c()
tt_pvalue <- c()
tt_df <- c()
for (i in names(summary_stat_study2_exp)[2:length(summary_stat_study2_exp)]) {
  #i <- as.name(i)
  g1 <- summary_stat_study2_exp[i][summary_stat_study2_exp$version == 1,]
  g2 <- summary_stat_study2_exp[i][summary_stat_study2_exp$version == 2,]
  
  tt_temp <- t.test(g1, g2)
  
  tt_tvalue_temp <- as.data.frame(tt_temp$statistic)
  rownames(tt_tvalue_temp) <- i
  
  tt_pvalue_temp <- as.data.frame(tt_temp$p.value)
  rownames(tt_pvalue_temp) <- i
  
  tt_df_temp <- as.data.frame(length(na.omit(g1))+length(na.omit(g2))-2)
  rownames(tt_df_temp) <- i
  
  tt_tvalue <- rbind(tt_tvalue, tt_tvalue_temp)
  tt_pvalue <- rbind(tt_pvalue, tt_pvalue_temp)
  tt_df <- rbind(tt_df, tt_df_temp)
  
}
# rename columns
colnames(tt_tvalue) <- "t_value"
colnames(tt_pvalue) <- "p_value"
colnames(tt_df) <- "df"
# add first item for version
tt_tvalue <- rbind(data.frame(t_value = NA), tt_tvalue)
tt_pvalue <- rbind(data.frame(p_value = NA), tt_pvalue)
tt_df <- rbind(data.frame(df = NA), tt_df)
# add rowname for version
rownames(tt_tvalue)[1] <- rownames(tt_pvalue)[1] <- "version"


# create final table
stat_table <- data.frame(mean_v1 = mean_v1, sd_v1 = sd_v1,
                         min_v1 = min_v1, max_v1 = max_v1,
                         obs_v1 = obs_v1,
                         mean_v2 = mean_v2, sd_v2 = sd_v2,
                         min_v2 = min_v2, max_v2 = max_v2,
                         obs_v2 = obs_v2)
stat_table <- cbind(stat_table, tt_tvalue, tt_pvalue, tt_df)

rownames(stat_table) <- c("version",
                            "Perceived national homogeneity",
                            "Perceived group malleability",
                            "Attitudes toward migrants",
                            "Stereotypes",
                            "Social distance",
                            "Cultural threat",
                            "Economic threat",
                            "Satisfied with Hong Kong's economy",
                            "Proud of ��HKer�� identity",
                            "Contact with migrants",
                            "Family income",
                            "Sex",
                            "Place of birth")
stat_table <- stat_table[-1,]

```


## Table 5: Summary Statistics of the Experiment in Study 2

This table provides a simple t-test to check the group differences between version 1 and version 2. It contains the t values and p-values and is on the right hand side of the table.

```{r results='asis'}
#stargazer(stat_table, type = "text", summary = FALSE, digits = 2) ## alternative output format
kable(stat_table, format = "html", booktabs = TRUE, digits = 2) %>% 
  add_header_above(c(" ", "Heterogeneous Group" = 5, "Homogeneous Group" = 5,
                     "Difference in Means" = 3)) %>%
  kable_styling(bootstrap_options = c("hover", "condensed"), full_width = F,
                 latex_options = "scale_down") %>% 
  column_spec(12:14, bold = TRUE)
```


# 5. Regression analyses --------------------------------------------------

Create a data frame by selecting the variables needed, declare them either as ordered factor or factor.

```{r}
# declare variables as ordered factor except some
df.test <- summary_stat_study2_exp %>% 
  mutate_all(as.numeric) %>% 
  mutate_at(vars(-attitu_immigrt, -ntl_homogty, -grp_malleaby, -stereotypes,
                 -satisfied_hk_econ, -proud_HKer, -contact_immigrt,
                 -version, -place_of_birth, -sex),
            # these variables are filtered out and leave it as numeric
            as.ordered) %>% 
  mutate_at(vars(attitu_immigrt, version, place_of_birth, sex), as.factor) # declare as factor

```

Run models

```{r}
m1.1_study2_exp <- lm(grp_malleaby ~
             version,
           data = df.test)

m1.2_study2_exp <- lm(grp_malleaby ~
                    version +
                    satisfied_hk_econ +
                    proud_HKer +
                    contact_immigrt +
                    household_income +
                    place_of_birth +
                    sex,
                  data = df.test)

m2.1_study2_exp <- polr(attitu_immigrt ~
                  version +
                  satisfied_hk_econ +
                  proud_HKer +
                  contact_immigrt +
                  household_income +
                  place_of_birth +
                  sex,
                  data = df.test,
                  method = "logistic",
                  Hess = TRUE)

m2.2_study2_exp <- polr(attitu_immigrt ~
                  grp_malleaby +
                  satisfied_hk_econ +
                  proud_HKer +
                  contact_immigrt +
                  household_income +
                  place_of_birth +
                  sex,
                  data = df.test,
                  method = "logistic",
                  Hess = TRUE)

```

## Table 6. Effects of the Treatment on Perceived Group Malleability and Attitudes Toward Migrants in Study 2

```{r results='asis', warning=FALSE, message=FALSE}

stargazer(m1.2_study2_exp, m2.1_study2_exp, m2.2_study2_exp,
            type = "text",
            digits = 2,
            dep.var.labels = c("Group malleability",
                            "Attitudes towards immigrants"),
          covariate.labels = c("Treatment (homogeneous condition",
                               "Group malleability",
                               "Satisfied with Hong Kong economy",
                               "Proud of \"HKer\" identity",
                               "Contact with migrants",
                               "Family income: HK$10,001�V25,000",
                               "Family income: HK$25,000+",
                               "Place of birth",
                               "Sex (Female)"),
          notes = "Entries are coefficients from various types of regressions; standard errors are in parentheses.",
          notes.align = "l",
          column.sep.width = "1pt")
```


# 6 Predicted values of perceived group malleability ---------------------------------------------


```{r eval=FALSE, include=TRUE}
m1.2_study2_exp <- lm(grp_malleaby ~
                    version +
                    satisfied_hk_econ +
                    proud_HKer +
                    contact_immigrt +
                    household_income +
                    place_of_birth +
                    sex,
                  data = df.test)
```


```{r results='asis'}
df.predict.prod.plot <- ggpredict(m1.2_study2_exp, terms = "version")
df.predict.prod.plot$x <- as.factor(df.predict.prod.plot$x) # set as factor

df.predict.prod.plot
```

## Figure 2. Predicted levels of malleability beliefs in Study 2

```{r}
df.predict.prod.plot$x <- c("Heterogeneous Group", "Homogeneous Group")

ggplot(df.predict.prod.plot, aes(x = x, y = predicted)) +
  geom_point() +
  geom_errorbar(aes(ymax = conf.high, ymin = conf.low),
                width = 0.2) +
  labs(y = "Predicted values of perceived group malleability",
       x = "Conditions")
```


# 7 Indirect effect of implicit beliefs ---------------------------------------------


Recode the attitudes towards immigrants to binary for the mediation analysis
```{r}
# combine 1-2; 3-5
df.test$attitu_immigrt <- as.numeric(df.test$attitu_immigrt)
df.test$attitu_immigrt[df.test$attitu_immigrt >= 1 & df.test$attitu_immigrt <= 2 & !is.na(df.test$attitu_immigrt)] <- 1
df.test$attitu_immigrt[df.test$attitu_immigrt >= 3 & df.test$attitu_immigrt <= 5 & !is.na(df.test$attitu_immigrt)] <- 2
df.test$attitu_immigrt <- as.factor(df.test$attitu_immigrt)
```

```{r mediation model setup}
df.mediate <- df.test %>% dplyr::select(version,
             attitu_immigrt,
             grp_malleaby,
             ntl_homogty,
             satisfied_hk_econ,
             proud_HKer,
             contact_immigrt,
             household_income,
             place_of_birth,
             sex) %>% na.omit()

#df.mediate$attitu_immigrt_n_order <- ordered(df.mediate$attitu_immigrt, levels = c(1:4))

model.m <- lm(grp_malleaby ~
             version +
             satisfied_hk_econ +
             proud_HKer +
             contact_immigrt +
             household_income +
             sex +
             place_of_birth,
           data = df.mediate)
```

```{r}

model.y <- glm(attitu_immigrt ~
             grp_malleaby +
             version +
             satisfied_hk_econ +
             proud_HKer +
             contact_immigrt +
             household_income +
             sex +
             place_of_birth,
           data = df.mediate,
           family=binomial(link='probit'))

```

```{r mediation model, error = FALSE, warning=FALSE}
out.1 <- mediate(model.m, model.y, sims = 1000, boot = TRUE,
                 treat = "version", mediator = "grp_malleaby",
                 control.value = 1, treat.value = 2,
                 conf.level = .95)
summary(out.1)

```

## Table 7. Mediation Effects of Implicit Beliefs About Group Malleability on Support Toward Migrants in Study 2

```{r}
cma <- summary(out.1)
acme <- c(cma$d.avg, cma$d.avg.ci)
direct_effect <- c(cma$z.avg, cma$z.avg.ci) 
total_effect <- c(cma$tau.coef, cma$tau.ci) 
round(rbind(acme, direct_effect, total_effect),2)
```


```{r mediation effect plot, error = TRUE}
plot(out.1)
```

Sensitivity analysis

```{r}
sens.out <- medsens(out.1, rho.by = 0.1, effect.type = "indirect", sims = 1000)
summary(sens.out)
```

## Appendix H: Sensitivity analysis for study 2 (fig. S2)

```{r}
plot(sens.out)
```

```{r}

```

